{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 获取验证码数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pywinauto\n",
    "from pywinauto import clipboard\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\pywinauto\\application.py:1032: UserWarning: 32-bit application should be automated using 32-bit Python (you use 64-bit Python)\n",
      "  UserWarning)\n"
     ]
    }
   ],
   "source": [
    "app = pywinauto.Application().connect(path=r\"C:\\同花顺软件\\同花顺\\xiadan.exe\",timeout=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "main = app.top_window()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_left_menus_handle():\n",
    "    while True:\n",
    "        try:\n",
    "            handle = main.window(\n",
    "                control_id=129,\n",
    "                class_name='SysTreeView32'\n",
    "            )\n",
    "            # sometime can't find handle ready, must retry\n",
    "            handle.wait('ready', 2)\n",
    "            return handle\n",
    "        except:\n",
    "            pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_left_menus_handle().get_item(['查询[F4]', '资金股票']).click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 验证码识别步骤"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from PIL import Image\n",
    "import numpy as np\n",
    "import uuid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_and_save(path):\n",
    "    path = \"captchas/\" + path\n",
    "    pix = np.array(Image.open(path).convert(\"L\"))\n",
    "    # threshold image\n",
    "    pix = (pix < 200) * 255\n",
    "\n",
    "    col_ranges = [\n",
    "        [2, 2 + 9],\n",
    "        [18, 18 + 9],\n",
    "        [34, 34 + 9],\n",
    "        [50, 50 + 9]\n",
    "    ]\n",
    "    # split and save\n",
    "    for col_range in col_ranges:\n",
    "        letter = pix[:, col_range[0]: col_range[1]]\n",
    "        im = Image.fromarray(np.uint8(letter))\n",
    "        save_path = \"./letters/\" + str(uuid.uuid4()) + \".png\"\n",
    "        im.save(save_path)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAD4AAAAXCAIAAADcAGvtAAAB1ElEQVR4nO2WMU/CQBSA31mMBBKsmDACAzg4kLgykDiiiWFy7VZHWPgfdJQJViZDIo5GBlakMQ7QRGCjCaQlIVZtcy61QKHXUomNCd90bd+99+Xd9VqEMYb/yZ7XAu7ZqXuBzxgJ/CTZVpef+htMKGs9+aEpXrzpeUq5cP7QUUmBn97HQrbB9j74h8bTuCThOQOZHWACvc443VGNYKjIDVI4xvidrYygMoLKciELbH2MDaN16cBCJzSuTxWjpK70JLUlfekX0WDpSLkbkvvov2UimAmxNu126mOoU/mUf357OHuNBxPE3NlMBGcWpoDv1NmGcYa9jw/WoHF9qpjZoI7AywU6gLepbu+z7oQZzmq0n9xyA4GfoKqYlALLK7BVLHxW1TXuWb2OUQ7TJlJhzERw/BNVJ5z8S8e1WPqsqMtKDQ4uN136aKh3BoWO4lKPgLWPWV0YfLRclUjQPpA0wdVcAgQfs3pPUtfGraBxdRE1520WnE7cDIKP6YTRutIGedP0vj6SZ0xbZc/D9i+3rL2ACjKAoz1J8jGpUyc0gHX0YmT+6hjqY9TW85RyEbtvu3JTnZYBAKD1KJYd/TuQfBDe/a//PTt1L9ipe8EeQshrB5d8AxoATTXJypnaAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<PIL.PngImagePlugin.PngImageFile image mode=RGB size=62x23 at 0x657C2D0>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Image.open(\"captchas/00438176-1f34-431d-b6ef-36fcbb8d953c.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00438176-1f34-431d-b6ef-36fcbb8d953c.png\n",
      "027f50b5-9cae-4e77-b9d7-dd97f85bb2c8.png\n",
      "0346e8e3-64c7-490b-867a-4114b1a05888.png\n",
      "04939206-43cb-47dc-aac7-0e4b36c08c4f.png\n",
      "067b9dd2-449e-4868-b9a3-bc1213f25035.png\n",
      "0793c96e-1c22-4a23-9339-147fe36c20c4.png\n",
      "079ff89c-ac91-4960-be78-c505731d52e3.png\n",
      "0addae1c-462a-40f4-8290-357f7dd4b24f.png\n",
      "0c5c0ab9-b9b3-43a1-87d8-e7673e518a03.png\n",
      "11ca34e7-f712-4b69-b453-c727ec547e44.png\n",
      "11efc48a-71ab-4a69-bf85-1a5495473451.png\n",
      "1240035b-2d22-4774-9e96-92de2aa5610f.png\n",
      "12d45039-5470-4f89-8e14-fad346f97dc2.png\n",
      "14e30f29-b785-4f9f-92b0-d9fb587dd2ce.png\n",
      "17623df6-399f-457f-bf55-a4d1c98f63fc.png\n",
      "1cc5d6d1-5c43-413b-a0d8-a755935c816b.png\n",
      "1df3616b-a00c-4d36-9157-b43d28f80e8a.png\n",
      "1f99fdaa-43cb-4871-ba35-b9e9ef53db47.png\n",
      "20047167-d7e4-4cd0-b2e1-046721a21bac.png\n",
      "212ae678-30f6-4e5d-8193-c5a562c49f07.png\n",
      "24467888-f89b-41cd-8b2d-77683b7d247a.png\n",
      "24c15218-9260-47d3-b827-d3f29c07d417.png\n",
      "2651fb7b-521c-4ef8-9e58-d524a86db666.png\n",
      "265952b8-1f3b-4b4f-8c6f-52f4b88117cf.png\n",
      "2df76314-a787-4aab-a5d3-f481cbd8bf12.png\n",
      "2f3a6325-9184-4f39-a551-b8250af12b8a.png\n",
      "31e8d34e-ba07-4ffe-97e1-52966cab3ead.png\n",
      "34736cf5-871c-4ca4-94e5-3ec784816676.png\n",
      "386b6324-2ca5-438a-8f04-795e18353dda.png\n",
      "38b3d292-7cc0-4d11-b048-192ea612ffb3.png\n",
      "39554b9c-b14e-48ea-aa4b-96b7722f6c39.png\n",
      "3ddf6579-2a59-4999-93c3-e81742e64615.png\n",
      "4433bc27-23a0-408e-81b0-490a362f2abf.png\n",
      "447be1d3-dbe5-4e5d-8bbd-62b01d40ebf6.png\n",
      "4643739b-e0f0-4e08-ab14-d89944319d5c.png\n",
      "471fe1f7-3d7a-4b21-8754-b8ef7c3910db.png\n",
      "4750c9c5-fd89-4eee-849a-8d08fdda10d7.png\n",
      "47c86809-65a4-4dc1-9f4c-1887559728e0.png\n",
      "49041894-7c5c-4df3-aa34-0d1d86cd2bfb.png\n",
      "558ecda1-b70d-4439-8c47-b8f4243a1202.png\n",
      "57efb97d-ec8f-43dd-a154-c40abfbb5c87.png\n",
      "58351e2e-6da5-4378-9677-f23e82bca56d.png\n",
      "5cbf066a-4fe9-423a-96b0-8bfa8867bcfe.png\n",
      "5d983161-02e4-479f-a7b2-1eb4816a2b41.png\n",
      "5e2448b5-a408-49bb-a1c0-b488828d3d23.png\n",
      "5e2e60bc-5a31-4134-ab4c-1da0b013ca38.png\n",
      "5fc1bf4b-28db-4582-980d-b1f51160ec23.png\n",
      "65fc9317-f8a9-4321-818c-64771c4db6a2.png\n",
      "67384b83-e283-472d-810b-9129a29083d1.png\n",
      "6764a52e-1e03-40aa-87a3-23f391985b2c.png\n",
      "6befd933-0b08-42e2-83ac-ba3fbcd50389.png\n",
      "6f7e1f12-8632-468b-abf4-8241a5ff9fb0.png\n",
      "7010afc1-b9a8-4ca6-b134-5097bda65572.png\n",
      "74f81104-f2a4-4427-af79-e6113be38e08.png\n",
      "784e5e14-f99e-4b7e-8cbb-1fee2a0494d0.png\n",
      "7aaa0522-85e0-4081-924c-e8a9ae3092d9.png\n",
      "7b9e64d8-8f76-45f9-ae90-3c2746fe12ab.png\n",
      "7bc99fa1-cc84-41d3-9975-2e9323a43b08.png\n",
      "7d812f0c-df2f-4216-8f9a-2cd101b68ac9.png\n",
      "7ec301f7-2464-4260-9393-1b07b1ab9d92.png\n",
      "80449e59-44ef-4dd8-a8ad-9988874945bb.png\n",
      "86a7fffc-eb47-4710-844f-f6ef55627200.png\n",
      "8792c038-e2f2-4ed1-ad2d-3a14f32b104a.png\n",
      "8ae3f2b3-bba0-4a70-be13-33ec5e93cc14.png\n",
      "8be1c798-56f7-4b86-be6f-8917eaf0012a.png\n",
      "8caca8ed-c010-4dce-b648-0ec609e26eda.png\n",
      "8e020f32-b33a-471a-bf1f-982aa2b32411.png\n",
      "8eb59138-65a5-460e-9bfa-02241087e6a8.png\n",
      "8fff98ed-7bb7-46c6-86c0-1b80dc5e2f9a.png\n",
      "90165165-2b02-468e-ba7b-220f3ea72cdb.png\n",
      "92083e65-03eb-411a-85b9-699ceb0808be.png\n",
      "9464b3ca-46eb-444b-a018-b1c6212d6bda.png\n",
      "95dd3dd2-627e-4973-be82-ed61fea45b0b.png\n",
      "97a464dc-2b0c-484b-a20e-d2e78705ad5d.png\n",
      "98217bf4-22f4-4c89-94a1-c912ef683d46.png\n",
      "9852a671-c4a2-4381-928f-79e3d3f8a642.png\n",
      "9d151b4a-63f0-446f-aaa0-344cca12ea56.png\n",
      "9de290f2-68c0-4895-a952-da0397dd7158.png\n",
      "9fe04dc6-9f93-4e9e-82c0-2490b53c1a2b.png\n",
      "a33fa702-6454-4c27-a47d-4c8ecb977b37.png\n",
      "a342dc64-1822-4c57-8c31-32c14fb740e9.png\n",
      "a6943b18-fbb1-486b-ab3c-ec764bcc35d8.png\n",
      "a900c451-9499-4714-8253-1f6aa45de3b8.png\n",
      "a9af1082-41d6-41d3-8ba2-630ac030c62a.png\n",
      "ac934d88-f5f5-4d12-b277-29aa620a3e18.png\n",
      "ad55d803-ec96-49be-bdde-e506a6d01166.png\n",
      "b0f60147-5ddb-40af-8b50-affa57b6690d.png\n",
      "b51f97c5-2f50-40cf-8206-4c4ad388e288.png\n",
      "b5e06b72-fed3-4950-826d-ba3abc06aeaa.png\n",
      "b8975785-f730-4e11-bfd3-29f28b63784c.png\n",
      "b9236b8a-8c5b-4fd6-ac72-e764d39de488.png\n",
      "b9af97f7-ffb0-4c4a-b1ec-6a43cc62e435.png\n",
      "b9d963e7-fcc2-42f9-bbe9-ac00ff2a2af2.png\n",
      "bb5f94f7-3d41-4256-89ec-1b91929bb17e.png\n",
      "be267d76-90b9-47b7-bb70-1fe101b4a2ed.png\n",
      "beceb7e2-984f-49b7-8730-f684ee48fe41.png\n",
      "c19309ce-d9f1-46a9-a10e-00de037aa518.png\n",
      "c4df5fa0-0db8-4493-823a-60ae7fd9e904.png\n",
      "c6143dd8-15ed-4c8d-92f0-c1c75480d8ea.png\n",
      "c67c01a5-9735-4fd4-be93-ec045843e30b.png\n",
      "c6879d95-1051-42ca-92d4-c283be3ce647.png\n",
      "c98e9ed1-4b4b-470e-9833-8573563880f8.png\n",
      "cb033795-2548-48d1-ad76-0c1b1e1a8ade.png\n",
      "cd417464-2145-4f37-9b9a-2af7053d70e1.png\n",
      "cda58b75-400a-4600-99fc-10212ca9ccec.png\n",
      "cdebcbe2-cd65-4b46-8984-3b4a6bdc2b96.png\n",
      "ce1fe204-f0f2-43b9-9a4f-6c98a173eb84.png\n",
      "d1be1b45-882b-4411-a0e5-8d54fbf0accc.png\n",
      "d242500a-243c-4073-9436-4209cbcf8d2f.png\n",
      "d31686aa-d5f0-4186-b6e6-1a8158b76660.png\n",
      "d5e29b32-78f9-428d-9cac-bd243b5f3bb8.png\n",
      "d719ae71-96ca-43ad-ab11-e89fe6400c4d.png\n",
      "d7d2ab99-d284-46d2-bf4e-30e5433e234f.png\n",
      "d8d153ce-b630-4bc0-9d3b-5716cb35f5ca.png\n",
      "da8689d2-484b-4bf0-937a-25921dc837cc.png\n",
      "dcd319c2-b84d-44de-ad3f-d573036c8b65.png\n",
      "de85981a-de38-47f4-b308-15d541a6505c.png\n",
      "e02b2e7a-4e59-4031-a283-1a7c94a207e5.png\n",
      "e25b7f06-6c18-478d-81f2-80f9da2c6a3d.png\n",
      "eb3e26e4-7208-47f3-82e4-282f50a3ccb6.png\n",
      "efafba71-b3b6-4c06-9c48-a03df0f1d1ab.png\n",
      "f28df1fa-914a-4a9f-802a-9c4eba46b4c8.png\n",
      "f2d8f56d-f2d5-4c1f-b108-f9b35feb858f.png\n",
      "f40242fe-a1a1-4712-b0bc-68ce922a4e33.png\n",
      "f58bb591-7684-440f-996e-ecf413e3f018.png\n",
      "f5bea90a-e819-4a61-992f-1e1ccafd28ab.png\n",
      "f6909361-50ec-4ab8-81bf-a874fc006890.png\n",
      "f7bacb21-bb12-4e11-b4f9-51c65ef6ddac.png\n",
      "f805e6c6-5931-443e-ba71-bf230ae8fd09.png\n",
      "f81dc072-7c66-44ad-bf66-996280f37274.png\n",
      "fac23036-6148-4aa9-bb56-06a0a2e0ee91.png\n",
      "fbdb62f0-8cce-4744-8357-76e2c0bebffc.png\n",
      "fde50a19-5e42-41d2-b048-3b5daf7455f8.png\n"
     ]
    }
   ],
   "source": [
    "im_paths = filter(lambda fn: os.path.splitext(fn)[1].lower() == '.png',\n",
    "                      os.listdir(\"captchas\"))\n",
    "\n",
    "for im_path in im_paths:\n",
    "    print(im_path)\n",
    "    split_and_save(im_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "import pickle\n",
    "\n",
    "\n",
    "def load_dataset():\n",
    "    X = []\n",
    "    y = []\n",
    "\n",
    "    for i in range(60):\n",
    "        path = \"./dataset/%d%d.png\" % (i / 6, i % 6 + 1)\n",
    "        pix = np.array(Image.open(path).convert(\"L\"))\n",
    "        # print(pix.reshape(8*20).shape)\n",
    "        X.append(pix.reshape(23*9))\n",
    "        y.append(i/6)\n",
    "    return np.array(X), np.array(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "           metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
       "           weights='uniform')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X, y = load_dataset()\n",
    "knn = KNeighborsClassifier(n_neighbors=5)\n",
    "knn.fit(X, y.astype(int))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"captcha_model.pickle\", \"wb\") as f:\n",
    "    pickle.dump(knn, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_model():\n",
    "    with open(\"captcha_model.pickle\", \"rb\") as f:\n",
    "        return pickle.load(f)\n",
    "\n",
    "\n",
    "def captcha_recognize(path):\n",
    "    model = get_model()\n",
    "    \n",
    "    pix = np.array(Image.open(path).convert(\"L\"))\n",
    "    # threshold image\n",
    "    pix = (pix < 200) * 255\n",
    "\n",
    "    col_ranges = [\n",
    "        [2, 2 + 9],\n",
    "        [18, 18 + 9],\n",
    "        [34, 34 + 9],\n",
    "        [50, 50 + 9]\n",
    "    ]\n",
    "    # split and save\n",
    "    letters = []\n",
    "    for col_range in col_ranges:\n",
    "        letter = pix[:, col_range[0]: col_range[1]]\n",
    "        letters.append(letter.reshape(9*23))\n",
    "        \n",
    "    res = model.predict(letters)\n",
    "    return \"\".join([str(ch) for ch in res])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'7317'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "captcha_recognize(\"captchas/00438176-1f34-431d-b6ef-36fcbb8d953c.png\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
